
set more off

use "$data\yield_rain_longseries.dta", clear
duplicates drop asicode year, force /*one duplicate district in yield data needs to be dropped*/
sort asicode88 year
merge 1:m asicode88 year using "$base\data\district889194_small_medium_large_factories"
drop _merge
sort asicode88 year
merge asicode88 year using "$data\chariexp.dta" /* this is district expenditure data*/
drop _merge
sort statecode88 year
merge statecode88 year using "$data\aghioncollapse.dta"
drop _merge
sort statenm
merge statenm using "$data\wheatrice80.dta" /* this is data on wheat and rice cultivation by state*/

* fixing error in labor reg strictness coding
g strictness = .
replace strictness = 0 if nstrict==0
replace strictness = 1 if prow==1
replace strictness = -1 if proe == 1

* change units for various factory data outcomes
replace mandays = mandays/1000
foreach x of varlist fixedcapital* workerswages totaloutput materials fuel electricity valueadded profits {
	replace `x' = `x'/(48.9*1000)
	}

* g per-worker vars
g manwork = mandays/workers
g outwork = totaloutput/workers
ge factwage=workerswages/workers
ge kbyl=fixedcapitalop/workers

*controls
foreach var in FDIreform clre delicense ubt77 ubt90 cmhdlft yield wage factwage kbyl {
	ge `var'88=`var'
	replace `var'88=. if year~=1987
	bys asicode88 size: egen new`var'=total(`var'88)
	drop `var'88
	rename new`var' `var'88
	}

* trim percentage vars (omit obs > 100 percent)
foreach y in agrarian kqratio food landed landless {
	foreach x of varlist `y'88* {
		replace `x' = . if `y'88==. | `y'88>100
		}
	}

*delicense and wheat area variable missing for some districts so i fill them in here*
bys statecode88 (year): egen meandel=mean(delicense)
replace delicense=meandel if year>1979 /*only keeping aghion years*/
bys statecode88: egen meanwheatareaper80=mean(wheatareaper80)
replace wheatareaper80=meanwheatareaper80 if wheatareaper80==.
bys statecode88: egen meanriceareaper80=mean(riceareaper80)
replace riceareaper80=meanriceareaper80 if riceareaper80==.
bys statecode88: egen meancultarea=mean(cultarea)
replace cultarea=meancultarea if cultarea==.

* additional prep
foreach x in medium large size {
g shockpct_nstrict_`x' = shockpctile*nstrict*`x'
g shockpct_prow_`x' = shockpctile*prow*`x'
g shockpct_proe_`x' = shockpctile*proe*`x'
g shockpct_`x' = shockpctile*`x'
g nstrict_`x' = nstrict*`x'
g prow_`x' = prow*`x'
g proe_`x' = proe*`x'
}
g shockpct_nstrict = shockpctile*nstrict

egen ds= group(asicode size)
egen shocksize = group(shockpctile size)

